<?php
/**
 * HSV - Hierarchical Separated Values
 *
 * A text-based file format and streaming protocol using ASCII control characters.
 * Unlimited nesting (like JSON). No escaping required. Binary data supported.
 *
 * Copyright 2026 Danslav Slavenskoj, Lingenic LLC
 * License: CC0 1.0 - Public Domain
 * https://creativecommons.org/publicdomain/zero/1.0/
 * You may use this code for any purpose without attribution.
 *
 * Spec: https://hsvfile.com
 * Repo: https://github.com/LingenicLLC/HSV
 */

namespace HSV;

// Control characters
const SOH = "\x01"; // Start of Header
const STX = "\x02"; // Start of Text (data block)
const ETX = "\x03"; // End of Text
const EOT = "\x04"; // End of Transmission
const SO  = "\x0e"; // Shift Out (start nested)
const SI  = "\x0f"; // Shift In (end nested)
const DLE = "\x10"; // Data Link Escape (binary mode)
const FS  = "\x1c"; // File/Record Separator
const GS  = "\x1d"; // Group/Array Separator
const RS  = "\x1e"; // Record/Property Separator
const US  = "\x1f"; // Unit/Key-Value Separator

/**
 * Parse HSV text into a document
 *
 * @param string $text HSV encoded text
 * @return array{header: array|null, records: array[]}
 */
function parse(string $text): array {
    // Extract binary sections first
    [$text, $binaries] = extractBinarySections($text);

    $doc = [
        'header' => null,
        'records' => []
    ];

    $len = strlen($text);
    $i = 0;

    while ($i < $len) {
        $c = $text[$i];

        if ($c === SOH) {
            // Find STX
            $stxPos = strpos($text, STX, $i + 1);
            if ($stxPos === false) {
                $i++;
                continue;
            }

            // Parse header
            $headerContent = substr($text, $i + 1, $stxPos - $i - 1);
            $doc['header'] = parseObject($headerContent, $binaries);

            // Find ETX
            $etxPos = strpos($text, ETX, $stxPos + 1);
            if ($etxPos === false) {
                $i = $stxPos + 1;
                continue;
            }

            // Parse records
            $dataContent = substr($text, $stxPos + 1, $etxPos - $stxPos - 1);
            foreach (splitRespectingNesting($dataContent, FS) as $record) {
                $obj = parseObject($record, $binaries);
                if (!empty($obj)) {
                    $doc['records'][] = $obj;
                }
            }

            $i = $etxPos + 1;
        } elseif ($c === STX) {
            // Find ETX
            $etxPos = strpos($text, ETX, $i + 1);
            if ($etxPos === false) {
                $i++;
                continue;
            }

            // Parse records
            $dataContent = substr($text, $i + 1, $etxPos - $i - 1);
            foreach (splitRespectingNesting($dataContent, FS) as $record) {
                $obj = parseObject($record, $binaries);
                if (!empty($obj)) {
                    $doc['records'][] = $obj;
                }
            }

            $i = $etxPos + 1;
        } else {
            $i++;
        }
    }

    return $doc;
}

/**
 * Extract DLE+STX...DLE+ETX binary sections and replace with placeholders
 */
function extractBinarySections(string $text): array {
    $result = '';
    $binaries = [];
    $len = strlen($text);
    $i = 0;
    $placeholderCount = 0;

    while ($i < $len) {
        if ($text[$i] === DLE && $i + 1 < $len && $text[$i + 1] === STX) {
            $j = $i + 2;
            $binaryData = '';

            while ($j < $len) {
                if ($text[$j] === DLE && $j + 1 < $len) {
                    if ($text[$j + 1] === ETX) {
                        // End of binary section
                        $placeholder = "\0BINARY{$placeholderCount}\0";
                        $binaries[$placeholder] = unescapeBinary($binaryData);
                        $result .= $placeholder;
                        $placeholderCount++;
                        $i = $j + 2;
                        break;
                    } elseif ($text[$j + 1] === DLE) {
                        // Escaped DLE
                        $binaryData .= DLE;
                        $j += 2;
                        continue;
                    }
                }
                $binaryData .= $text[$j];
                $j++;
            }

            if ($j >= $len) {
                $result .= $text[$i];
                $i++;
            }
        } else {
            $result .= $text[$i];
            $i++;
        }
    }

    return [$result, $binaries];
}

/**
 * Handle DLE escaping: DLE+DLE -> DLE
 */
function unescapeBinary(string $data): string {
    $result = '';
    $len = strlen($data);
    $i = 0;

    while ($i < $len) {
        if ($data[$i] === DLE && $i + 1 < $len && $data[$i + 1] === DLE) {
            $result .= DLE;
            $i += 2;
        } else {
            $result .= $data[$i];
            $i++;
        }
    }

    return $result;
}

/**
 * Replace binary placeholders with actual binary data
 */
function restoreBinaries(string $value, array $binaries): string {
    foreach ($binaries as $placeholder => $data) {
        $value = str_replace($placeholder, $data, $value);
    }
    return $value;
}

/**
 * Split string by separator, respecting SO/SI nesting
 */
function splitRespectingNesting(string $text, string $sep): array {
    $parts = [];
    $current = '';
    $depth = 0;
    $len = strlen($text);

    for ($i = 0; $i < $len; $i++) {
        $c = $text[$i];

        if ($c === SO) {
            $depth++;
            $current .= $c;
        } elseif ($c === SI) {
            $depth--;
            $current .= $c;
        } elseif ($c === $sep && $depth === 0) {
            $parts[] = $current;
            $current = '';
        } else {
            $current .= $c;
        }
    }

    if ($current !== '' || !empty($parts)) {
        $parts[] = $current;
    }

    return $parts;
}

/**
 * Parse a value, handling arrays (GS) and nested structures (SO/SI)
 */
function parseValue(string $value, array $binaries): mixed {
    $value = restoreBinaries($value, $binaries);

    // Check for nested structure (SO at start, SI at end)
    if (strlen($value) >= 2 && $value[0] === SO && $value[strlen($value) - 1] === SI) {
        $inner = substr($value, 1, -1);
        return parseObject($inner, $binaries);
    }

    // Check for array
    if (strpos($value, GS) !== false) {
        $parts = splitRespectingNesting($value, GS);
        return array_map(fn($p) => parseValue($p, $binaries), $parts);
    }

    return $value;
}

/**
 * Parse an object from RS-separated properties
 */
function parseObject(string $content, array $binaries): array {
    $obj = [];

    $props = splitRespectingNesting($content, RS);
    foreach ($props as $prop) {
        $parts = splitRespectingNesting($prop, US);
        if (count($parts) >= 2) {
            $k = $parts[0];
            $v = implode(US, array_slice($parts, 1));
            $obj[$k] = parseValue($v, $binaries);
        }
    }

    return $obj;
}

// Self-test when run directly
if (php_sapi_name() === 'cli' && isset($argv[0]) && realpath($argv[0]) === __FILE__) {
    echo str_repeat('=', 50) . "\n";
    echo "HSV Parser Tests (PHP)\n";
    echo str_repeat('=', 50) . "\n";

    $passed = 0;
    $failed = 0;

    function test(string $name, callable $fn): void {
        global $passed, $failed;
        try {
            $fn();
            echo "✓ {$name}\n";
            $passed++;
        } catch (\Throwable $e) {
            echo "✗ {$name}: {$e->getMessage()}\n";
            $failed++;
        }
    }

    function assertEquals($expected, $actual, string $msg = ''): void {
        if ($expected !== $actual) {
            throw new \Exception($msg ?: "Expected " . json_encode($expected) . ", got " . json_encode($actual));
        }
    }

    test('Basic parsing', function() {
        $result = parse(STX . "name" . US . "Alice" . RS . "age" . US . "30" . ETX);
        assertEquals(1, count($result['records']));
        assertEquals('Alice', $result['records'][0]['name']);
        assertEquals('30', $result['records'][0]['age']);
    });

    test('Multiple records', function() {
        $result = parse(STX . "name" . US . "Alice" . FS . "name" . US . "Bob" . ETX);
        assertEquals(2, count($result['records']));
    });

    test('Array values', function() {
        $result = parse(STX . "tags" . US . "a" . GS . "b" . GS . "c" . ETX);
        assertEquals(['a', 'b', 'c'], $result['records'][0]['tags']);
    });

    test('SOH header', function() {
        $result = parse(SOH . "hsv" . US . "1.0" . RS . "type" . US . "users" . STX . "name" . US . "Alice" . ETX);
        assertEquals('1.0', $result['header']['hsv']);
        assertEquals('users', $result['header']['type']);
        assertEquals(1, count($result['records']));
    });

    test('SO/SI nesting', function() {
        $result = parse(STX . "user" . US . SO . "name" . US . "Alice" . RS . "email" . US . "a@b.com" . SI . ETX);
        assertEquals('Alice', $result['records'][0]['user']['name']);
        assertEquals('a@b.com', $result['records'][0]['user']['email']);
    });

    test('Deep nesting', function() {
        $result = parse(STX . "data" . US . SO . "level1" . US . SO . "level2" . US . "deep" . SI . SI . ETX);
        assertEquals('deep', $result['records'][0]['data']['level1']['level2']);
    });

    test('DLE binary mode', function() {
        $binaryData = "raw" . STX . "data" . ETX . "here";
        $result = parse(STX . "type" . US . "image" . RS . "data" . US . DLE . STX . $binaryData . DLE . ETX . ETX);
        assertEquals('image', $result['records'][0]['type']);
        assertEquals($binaryData, $result['records'][0]['data']);
    });

    test('DLE escaping', function() {
        $binaryData = "has" . DLE . "dle";
        $escaped = str_replace(DLE, DLE . DLE, $binaryData);
        $result = parse(STX . "data" . US . DLE . STX . $escaped . DLE . ETX . ETX);
        assertEquals($binaryData, $result['records'][0]['data']);
    });

    test('Newlines in values', function() {
        $result = parse(STX . "text" . US . "line1\nline2\nline3" . ETX);
        assertEquals("line1\nline2\nline3", $result['records'][0]['text']);
    });

    test('Quotes (no escaping)', function() {
        $result = parse(STX . "msg" . US . 'He said "hello"' . ETX);
        assertEquals('He said "hello"', $result['records'][0]['msg']);
    });

    test('Mixed content', function() {
        $result = parse("ignored" . STX . "name" . US . "Alice" . ETX . "also ignored");
        assertEquals(1, count($result['records']));
        assertEquals('Alice', $result['records'][0]['name']);
    });

    test('Multiple blocks', function() {
        $result = parse(STX . "a" . US . "1" . ETX . "junk" . STX . "b" . US . "2" . ETX);
        assertEquals(2, count($result['records']));
    });

    test('Nested structure with array', function() {
        $result = parse(STX . "user" . US . SO . "name" . US . "Alice" . RS . "tags" . US . "admin" . GS . "user" . SI . ETX);
        assertEquals('Alice', $result['records'][0]['user']['name']);
        assertEquals(['admin', 'user'], $result['records'][0]['user']['tags']);
    });

    echo str_repeat('=', 50) . "\n";
    echo "{$passed} passed, {$failed} failed\n";
    echo str_repeat('=', 50) . "\n";

    exit($failed > 0 ? 1 : 0);
}
